mathlea_10 <- get_data("EDFacts_math_achievement_lea_2010_2019")

rlalea_10 <- get_data("EDFacts_rla_achievement_lea_2010_2019")

fiscal2010 <- get_data("NCES_CCD_fiscal_district_2010")

mathsch_10 <- get_data("EDFacts_math_achievement_sch_2010_2019")

rlasch_10 <- get_data("EDFacts_rla_achievement_sch_2010_2019")

Data Visualization 1

Rebecca

Research Question:

How do high school students’ subgroup makeup (i.e., Race/ethnicity, Male vs. Female, economically disadvantaged, Limited English, Migrant status, Disability status, and Homelessness) differ among states/regions?

Datasets:

  • EDFacts_math_achievement_sch_2010_2019
  • EDFacts_rla_achievement_sch_2010_2019
df <- full_join(mathsch_10, rlasch_10) 

df2 <- df %>% 
    mutate(ECDmath = gsub("[^0-9.-]", "", ECD_MTHHSPCTPROF),
                 ECDrla = gsub("[^0-9.-]", "", ECD_RLAHSPCTPROF),
                 LEPrla = gsub("[^0-9.-]", "", LEP_RLAHSPCTPROF),
                 LEPmath = gsub("[^0-9.-]", "", LEP_MTHHSPCTPROF),
                 HOMmath = gsub("[^0-9.-]", "", HOM_MTHHSPCTPROF),
                 HOMrla = gsub("[^0-9.-]", "", HOM_RLAHSPCTPROF),
                 Mmath = gsub("[^0-9.-]", "", M_MTHHSPCTPROF),
                 Mrla = gsub("[^0-9.-]", "", M_RLAHSPCTPROF),
                 Fmath = gsub("[^0-9.-]", "", F_MTHHSPCTPROF),
                 Frla = gsub("[^0-9.-]", "", F_RLAHSPCTPROF),
                 MBLmath = gsub("[^0-9.-]", "", MBL_MTHHSPCTPROF),
                 MBLrla = gsub("[^0-9.-]", "", MBL_RLAHSPCTPROF),
                 MHImath = gsub("[^0-9.-]", "", MHI_MTHHSPCTPROF),
                 MHIrla = gsub("[^0-9.-]", "", MHI_RLAHSPCTPROF),
                 MWHmath = gsub("[^0-9.-]", "", MWH_MTHHSPCTPROF),
                 MWHrla = gsub("[^0-9.-]", "", MWH_RLAHSPCTPROF),
                 CWDmath = gsub("[^0-9.-]", "", CWD_MTHHSPCTPROF),
                 CWDrla = gsub("[^0-9.-]", "", CWD_RLAHSPCTPROF))

df2$ECDmath = readr::parse_number(df2$ECDmath) 
df2$ECDrla = readr::parse_number(df2$ECDrla) 
df2$LEPrla = readr::parse_number(df2$LEPrla) 
df2$LEPmath = readr::parse_number(df2$LEPmath) 
df2$HOMrla = readr::parse_number(df2$HOMrla) 
df2$HOMmath = readr::parse_number(df2$HOMmath) 
df2$Mrla = readr::parse_number(df2$Mrla) 
df2$Mmath = readr::parse_number(df2$Mmath) 
df2$Frla = readr::parse_number(df2$Frla) 
df2$Fmath = readr::parse_number(df2$Fmath) 
df2$MBLrla = readr::parse_number(df2$MBLrla) 
df2$MBLmath = readr::parse_number(df2$MBLmath) 
df2$MHIrla = readr::parse_number(df2$MHIrla) 
df2$MHImath = readr::parse_number(df2$MHImath) 
df2$MWHmath = readr::parse_number(df2$MWHmath) 
df2$MWHrla = readr::parse_number(df2$MWHrla) 
df2$CWDmath = readr::parse_number(df2$CWDmath) 
df2$CWDrla = readr::parse_number(df2$CWDrla) 

dat <- df2 %>% 
    group_by(STNAM) %>% 
    summarise(
        mean_EconmoicallyDisadvantaged_math = mean(ECDmath, na.rm = TRUE),
        mean_EconomicallyDisadvantaged_rla = mean(ECDrla, na.rm = TRUE),
        mean_EnglishLearner_math = mean(LEPmath, na.rm = TRUE),
        mean_EnglishLearner_rla = mean(LEPrla, na.rm = TRUE),
        mean_Homeless_math = mean(HOMmath, na.rm = TRUE),
        mean_Homeless_rla = mean(HOMrla, na.rm = TRUE),
        mean_Male_math = mean(Mmath, na.rm = TRUE),
        mean_Male_rla = mean(Mrla, na.rm = TRUE),
        mean_Female_math = mean(Fmath, na.rm = TRUE),
        mean_Female_rla = mean(Frla, na.rm = TRUE),
        mean_Black_math = mean(MBLmath, na.rm = TRUE),
        mean_Black_rla = mean(MBLrla, na.rm = TRUE),
        mean_Hispanic_math = mean(MHImath, na.rm = TRUE),
        mean_Hispanic_rla = mean(MHIrla, na.rm = TRUE),
        mean_White_math = mean(MWHmath, na.rm = TRUE),
        mean_White_rla = mean(MWHrla, na.rm = TRUE),
        mean_Disabled_math = mean(CWDmath, na.rm = TRUE),
        mean_Disabled_rla = mean(CWDrla, na.rm = TRUE)
    ) 

dat <- dat %>% filter(!STNAM == "BUREAU OF INDIAN AFFAIRS" & !STNAM == "stnam" & !STNAM == "PUERTO RICO") 

regions <- import(here("data","us_census_bureau_regions_and_divisions.csv")) %>%
    rename(state = `State Code`, `STNAM` = State)

df_regions <- regex_inner_join(dat, regions,ignore_case = TRUE) 

df_pivot <- df_regions %>% 
    pivot_longer(
        cols = starts_with("mean_"),
        names_to = c("Subgroup", "test"),
        values_to = "mean_pct",
        names_sep = "_",
        values_drop_na = TRUE,
        names_repair = "check_unique",
        names_prefix = "mean_"
    ) 

df_pivot$STNAM.y <- as.factor(df_pivot$STNAM.y)
df_pivot$Subgroup <- as.factor(df_pivot$Subgroup)
#Plot showing mean test score by subgroup and test
#AW: I'm getting this error message: Error in median.default(X[[i]], ...) : need numeric data

# df_pivot %>%   
#   ggplot(aes(x = mean_pct, 
#                        y = fct_reorder(STNAM.y, Subgroup),
#                        fill = mean_pct,
#                        color = test)) +
#   geom_point(size = .8,
#                        alpha = .7) +
#   facet_wrap(~Subgroup) +
#   guides(fill = "none") +
#   scale_color_discrete(l = 45,
#                                            name = "Test",
#                                            labels = c("Math", "Reading")) +
#   labs(title = 'Mean percentage of students that scored at or above proficient',
#            x = 'Mean percent proficient',
#            y = 'State') +
#   theme_minimal()


#Plot showing mean test score by subgroup and test by state

df_pivot %>%   
    ggplot(aes(x = mean_pct, 
                         y = fct_reorder(Subgroup, mean_pct))) +
    geom_jitter(aes(color = test),
                            size = .9,
                            alpha = .8) +
    facet_wrap(~STNAM.y) +
    scale_color_discrete(l = 70,
                                             name = "Test",
                                             labels = c("Math", "Reading")) +
    labs(title = 'Mean percentage of students that scored at or above proficient',
             x = 'Mean percent proficient',
             y = 'Group') 

#Plot showing mean test score by subgroup and test by region

ggplot(df_pivot, aes(x = mean_pct, 
                                         y = fct_reorder(Subgroup, mean_pct), 
                                         fill = Region,
                                         alpha = .7)) +
    guides(alpha = "none") +
    geom_density_ridges(rel_min_height = 0.005) +
    theme_minimal() +
    labs(title = 'Mean percentage of students that scored at or above proficient',
             x = 'Mean percent proficient',
             y = 'Group') 

#Trying to figure out how to show test scores separated by Gender but the plot won't create separate values for each

#AW: I'm getting this error message: Error: Discrete value supplied to continuous scale

# df_pivot %>% 
#   filter(Subgroup %in% c("Male", "Female")) %>% 
#   group_by(Subgroup) %>% 
#   ggplot(aes(x = mean_pct,
#                        y = fct_reorder(STNAM.y, mean_pct),
#                        fill = paste(STNAM.y, Subgroup))) + 
#   geom_density_ridges_gradient(scale = 2, size = 0.3, rel_min_height = 0.001) +
#   scale_fill_viridis_c(name = "%", option = "C") +
#   coord_cartesian(clip = "off") +
#   theme_ridges(grid = FALSE) +
#   labs(title = 'Mean percentage of economically disadvantaged students that scored at or above proficient',
#            x = 'Mean percent proficient',
#            y = 'State') 
#Maps
#AW: Should map titles include "economically disadvantaged?" They look like they include all student subgroups to me. 

us <- usa_sf()

us <- rename(us, state = iso_3166_2)

df_geo <- inner_join(df_pivot, us, by = "state")

dat2 <- full_join(df_pivot, df_geo)

#Math only
dat2 %>% 
    filter(test == "math") %>% 
    ggplot(aes(geometry = geometry, 
                         fill = mean_pct,
                         color = test), 
                 alpha = 0.9) + 
    facet_wrap(~Subgroup) +
    geom_sf(color = "white", size = 0) +
    guides(color = "none") +
    scale_fill_viridis(name = "%",
                                         breaks = c(0, 20, 40, 60, 80)) +
    labs(title = "Mean percentage of economically disadvantaged students that scored at or above proficient",
             subtitle = "Math",
             caption = "Source: U.S. Department of Education") +
    theme_void()

#Reading only
dat2 %>% 
    filter(test == "rla") %>% 
    ggplot(aes(geometry = geometry, 
                         fill = mean_pct,
                         color = test), 
                 alpha = 0.9) + 
    facet_wrap(~Subgroup) +
    geom_sf(color = "white", size = 0) +
    guides(color = "none") +
    scale_fill_viridis(name = "%",
                                         breaks = c(0, 20, 40, 60, 80)) +
    labs(title = "Mean percentage of economically disadvantaged students that scored at or above proficient",
             subtitle = "Reading",
             caption = "Source: U.S. Department of Education") +
    theme_void()

#combined map
ggplot(data = dat2, aes(geometry = geometry, 
                                                fill = mean_pct,
                                                color = test), 
             alpha = 0.9) + 
    facet_wrap(~Subgroup) +
    geom_sf(color = "white", size = 0) +
    guides(color = "none") +
    scale_fill_viridis(name = "%",
                                         breaks = c(0, 20, 40, 60, 80)) +
    labs(title = "Mean percentage of economically disadvantaged students that scored at or above proficient",
             subtitle = "Across Math and Reading",
             caption = "Source: U.S. Department of Education") +
    theme_void()

Data Visualization 2

Ksenia

Research Question:

What drives current expenditure on education? How is funding allocated by state and how do the funding allocations correlate with eh student performance (graduation rates and/or test scores)? Does cross-state variation in expenditure explain the cross-state variation in education outcomes? More specifically, I might look at the following:

  • What percentage of expenditure accounts for instruction/ textbooks/ technology-related equipment & services/ instructional equipment?
  • What are the gross amounts spent on those categories?
  • How do those percentages/ gross amounts correlate with the diversity of the student population/portion of white students in school?
  • Does increased spending on any of the categories correlate with increased students’ performance?
  • Can higher teachers’ salaries result in better test scores?
  • What amount of funding is spent on special education programs and how that correlates with performance of students with disabilities?

Datasets:

  • EDFacts_rla_achievement_lea_2010_2019
  • EDFacts_math_achievement_lea_2010_2019
  • NCES_CCD_fiscal_district_2010
colnames(fiscal2010)
##   [1] "A07"                   "A08"                   "A09"                  
##   [4] "A11"                   "A13"                   "A15"                  
##   [7] "A20"                   "A40"                   "AGCHRT"               
##  [10] "B10"                   "B11"                   "B12"                  
##  [13] "B13"                   "C01"                   "C04"                  
##  [16] "C05"                   "C06"                   "C07"                  
##  [19] "C08"                   "C09"                   "C10"                  
##  [22] "C11"                   "C12"                   "C13"                  
##  [25] "C14"                   "C15"                   "C16"                  
##  [28] "C17"                   "C19"                   "C20"                  
##  [31] "C24"                   "C25"                   "C35"                  
##  [34] "C36"                   "C38"                   "C39"                  
##  [37] "CBSA"                  "CCDNF"                 "CENFILE"              
##  [40] "CENSUSID"              "CONUM"                 "CSA"                  
##  [43] "D11"                   "D23"                   "E07"                  
##  [46] "E08"                   "E09"                   "E11"                  
##  [49] "E13"                   "E17"                   "F12"                  
##  [52] "FIPST"                 "FL_19H"                "FL_21F"               
##  [55] "FL_31F"                "FL_41F"                "FL_61V"               
##  [58] "FL_66V"                "FL_A07"                "FL_A08"               
##  [61] "FL_A09"                "FL_A11"                "FL_A13"               
##  [64] "FL_A15"                "FL_A20"                "FL_A40"               
##  [67] "FL_B10"                "FL_B11"                "FL_B12"               
##  [70] "FL_B13"                "FL_C01"                "FL_C04"               
##  [73] "FL_C05"                "FL_C06"                "FL_C07"               
##  [76] "FL_C08"                "FL_C09"                "FL_C10"               
##  [79] "FL_C11"                "FL_C12"                "FL_C13"               
##  [82] "FL_C14"                "FL_C15"                "FL_C16"               
##  [85] "FL_C17"                "FL_C19"                "FL_C20"               
##  [88] "FL_C24"                "FL_C25"                "FL_C35"               
##  [91] "FL_C36"                "FL_C38"                "FL_C39"               
##  [94] "FL_D11"                "FL_D23"                "FL_E07"               
##  [97] "FL_E08"                "FL_E09"                "FL_E11"               
## [100] "FL_E13"                "FL_E17"                "FL_F12"               
## [103] "FL_G15"                "FL_HE1"                "FL_HE2"               
## [106] "FL_HR1"                "FL_I86"                "FL_K09"               
## [109] "FL_K10"                "FL_K11"                "FL_L12"               
## [112] "FL_M12"                "FL_MEMBERSCH"          "FL_Q11"               
## [115] "FL_T02"                "FL_T06"                "FL_T09"               
## [118] "FL_T15"                "FL_T40"                "FL_T99"               
## [121] "FL_U11"                "FL_U22"                "FL_U30"               
## [124] "FL_U50"                "FL_U97"                "FL_V10"               
## [127] "FL_V11"                "FL_V12"                "FL_V13"               
## [130] "FL_V14"                "FL_V15"                "FL_V16"               
## [133] "FL_V17"                "FL_V18"                "FL_V21"               
## [136] "FL_V22"                "FL_V23"                "FL_V24"               
## [139] "FL_V29"                "FL_V30"                "FL_V32"               
## [142] "FL_V33"                "FL_V37"                "FL_V38"               
## [145] "FL_V40"                "FL_V45"                "FL_V60"               
## [148] "FL_V65"                "FL_V70"                "FL_V75"               
## [151] "FL_V80"                "FL_V85"                "FL_V90"               
## [154] "FL_V91"                "FL_V92"                "FL_V93"               
## [157] "FL_W01"                "FL_W31"                "FL_W61"               
## [160] "FL_Z32"                "FL_Z33"                "FL_Z34"               
## [163] "FL_Z35"                "FL_Z36"                "FL_Z37"               
## [166] "FL_Z38"                "FILEURL"               "G15"                  
## [169] "GSHI"                  "GSLO"                  "HE1"                  
## [172] "HE2"                   "HR1"                   "I86"                  
## [175] "K09"                   "K10"                   "K11"                  
## [178] "L12"                   "LEAID"                 "M12"                  
## [181] "MEMBERSCH"             "NAME"                  "Q11"                  
## [184] "SCHLEV"                "STABBR"                "STNAME"               
## [187] "T02"                   "T06"                   "T09"                  
## [190] "T15"                   "T40"                   "T99"                  
## [193] "TCAPOUT"               "TCURELSC"              "TCURINST"             
## [196] "TCUROTH"               "TCURSSVC"              "TFEDREV"              
## [199] "TLOCREV"               "TNONELSE"              "TOTALEXP"             
## [202] "TOTALREV"              "TSTREV"                "U11"                  
## [205] "U22"                   "U30"                   "U50"                  
## [208] "U97"                   "V10"                   "V11"                  
## [211] "V12"                   "V13"                   "V14"                  
## [214] "V15"                   "V16"                   "V17"                  
## [217] "V18"                   "V21"                   "V22"                  
## [220] "V23"                   "V24"                   "V29"                  
## [223] "V30"                   "V32"                   "V33"                  
## [226] "V37"                   "V38"                   "V40"                  
## [229] "V45"                   "V60"                   "V65"                  
## [232] "V70"                   "V75"                   "V80"                  
## [235] "V85"                   "V90"                   "V91"                  
## [238] "V92"                   "V93"                   "W01"                  
## [241] "W31"                   "W61"                   "WEIGHT"               
## [244] "Z32"                   "Z33"                   "Z34"                  
## [247] "Z35"                   "Z36"                   "Z37"                  
## [250] "Z38"                   "_19H"                  "_21F"                 
## [253] "_31F"                  "_41F"                  "_61V"                 
## [256] "_66V"                  "_YEAR"                 "YEAR"                 
## [259] "PIPELINE"              "DL_INGESTION_DATETIME"
skim(fiscal2010)
Data summary
Name fiscal2010
Number of rows 18247
Number of columns 260
_______________________
Column type frequency:
character 129
numeric 130
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
AGCHRT 0 1 1 1 0 4 0
CBSA 0 1 1 5 0 939 0
CENSUSID 0 1 1 14 0 14842 0
CONUM 0 1 5 5 0 3129 0
CSA 0 1 1 3 0 125 0
FIPST 0 1 2 2 0 51 0
FL_19H 0 1 1 1 0 4 0
FL_21F 0 1 1 1 0 4 0
FL_31F 0 1 1 1 0 4 0
FL_41F 0 1 1 1 0 4 0
FL_61V 0 1 1 1 0 3 0
FL_66V 0 1 1 1 0 4 0
FL_A07 0 1 1 1 0 4 0
FL_A08 0 1 1 1 0 3 0
FL_A09 0 1 1 1 0 3 0
FL_A11 0 1 1 1 0 3 0
FL_A13 0 1 1 1 0 3 0
FL_A15 0 1 1 1 0 4 0
FL_A20 0 1 1 1 0 4 0
FL_A40 0 1 1 1 0 4 0
FL_B10 0 1 1 1 0 3 0
FL_B11 0 1 1 1 0 3 0
FL_B12 0 1 1 1 0 4 0
FL_B13 0 1 1 1 0 3 0
FL_C01 0 1 1 1 0 4 0
FL_C04 0 1 1 1 0 4 0
FL_C05 0 1 1 1 0 3 0
FL_C06 0 1 1 1 0 3 0
FL_C07 0 1 1 1 0 4 0
FL_C08 0 1 1 1 0 3 0
FL_C09 0 1 1 1 0 4 0
FL_C10 0 1 1 1 0 3 0
FL_C11 0 1 1 1 0 3 0
FL_C12 0 1 1 1 0 3 0
FL_C13 0 1 1 1 0 4 0
FL_C14 0 1 1 1 0 4 0
FL_C15 0 1 1 1 0 3 0
FL_C16 0 1 1 1 0 3 0
FL_C17 0 1 1 1 0 3 0
FL_C19 0 1 1 1 0 3 0
FL_C20 0 1 1 1 0 4 0
FL_C24 0 1 1 1 0 3 0
FL_C25 0 1 1 1 0 4 0
FL_C35 0 1 1 1 0 3 0
FL_C36 0 1 1 1 0 4 0
FL_C38 0 1 1 1 0 4 0
FL_C39 0 1 1 1 0 4 0
FL_D11 0 1 1 1 0 4 0
FL_D23 0 1 1 1 0 4 0
FL_E07 0 1 1 1 0 5 0
FL_E08 0 1 1 1 0 5 0
FL_E09 0 1 1 1 0 5 0
FL_E11 0 1 1 1 0 4 0
FL_E13 0 1 1 1 0 5 0
FL_E17 0 1 1 1 0 5 0
FL_F12 0 1 1 1 0 4 0
FL_G15 0 1 1 1 0 4 0
FL_HE1 0 1 1 1 0 4 0
FL_HE2 0 1 1 1 0 4 0
FL_HR1 0 1 1 1 0 4 0
FL_I86 0 1 1 1 0 4 0
FL_K09 0 1 1 1 0 5 0
FL_K10 0 1 1 1 0 4 0
FL_K11 0 1 1 1 0 4 0
FL_L12 0 1 1 1 0 3 0
FL_M12 0 1 1 1 0 3 0
FL_MEMBERSCH 0 1 1 1 0 3 0
FL_Q11 0 1 1 1 0 4 0
FL_T02 0 1 1 1 0 4 0
FL_T06 0 1 1 1 0 4 0
FL_T09 0 1 1 1 0 3 0
FL_T15 0 1 1 1 0 3 0
FL_T40 0 1 1 1 0 3 0
FL_T99 0 1 1 1 0 4 0
FL_U11 0 1 1 1 0 3 0
FL_U22 0 1 1 1 0 4 0
FL_U30 0 1 1 1 0 3 0
FL_U50 0 1 1 1 0 4 0
FL_U97 0 1 1 1 0 4 0
FL_V10 0 1 1 1 0 5 0
FL_V11 0 1 1 1 0 4 0
FL_V12 0 1 1 1 0 4 0
FL_V13 0 1 1 1 0 3 0
FL_V14 0 1 1 1 0 4 0
FL_V15 0 1 1 1 0 3 0
FL_V16 0 1 1 1 0 4 0
FL_V17 0 1 1 1 0 3 0
FL_V18 0 1 1 1 0 4 0
FL_V21 0 1 1 1 0 3 0
FL_V22 0 1 1 1 0 4 0
FL_V23 0 1 1 1 0 4 0
FL_V24 0 1 1 1 0 5 0
FL_V29 0 1 1 1 0 3 0
FL_V30 0 1 1 1 0 5 0
FL_V32 0 1 1 1 0 3 0
FL_V33 0 1 1 1 0 3 0
FL_V37 0 1 1 1 0 4 0
FL_V38 0 1 1 1 0 5 0
FL_V40 0 1 1 1 0 4 0
FL_V45 0 1 1 1 0 5 0
FL_V60 0 1 1 1 0 4 0
FL_V65 0 1 1 1 0 4 0
FL_V70 0 1 1 1 0 3 0
FL_V75 0 1 1 1 0 3 0
FL_V80 0 1 1 1 0 4 0
FL_V85 0 1 1 1 0 4 0
FL_V90 0 1 1 1 0 5 0
FL_V91 0 1 1 1 0 4 0
FL_V92 0 1 1 1 0 3 0
FL_V93 0 1 1 1 0 4 0
FL_W01 0 1 1 1 0 4 0
FL_W31 0 1 1 1 0 4 0
FL_W61 0 1 1 1 0 4 0
FL_Z32 0 1 1 1 0 4 0
FL_Z33 0 1 1 1 0 4 0
FL_Z34 0 1 1 1 0 5 0
FL_Z35 0 1 1 1 0 4 0
FL_Z36 0 1 1 1 0 3 0
FL_Z37 0 1 1 1 0 4 0
FL_Z38 0 1 1 1 0 3 0
FILEURL 0 1 48 48 0 1 0
GSHI 0 1 1 2 0 16 0
GSLO 0 1 1 2 0 16 0
LEAID 0 1 7 7 0 18247 0
NAME 0 1 3 60 0 17717 0
SCHLEV 0 1 1 2 0 7 0
STABBR 0 1 2 2 0 51 0
STNAME 0 1 4 20 0 51 0
PIPELINE 0 1 29 29 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
A07 0 1 62321.43 402543.33 -2 0 0 9000.0 20229000 ▇▁▁▁▁
A08 0 1 5939.67 128277.93 -2 0 0 0.0 15564000 ▇▁▁▁▁
A09 0 1 364123.59 1144107.53 -2 3000 79000 319000.0 47205000 ▇▁▁▁▁
A11 0 1 10957.26 85439.61 -2 0 0 0.0 3510000 ▇▁▁▁▁
A13 0 1 211909.26 1823605.27 -2 0 8000 97000.0 171949000 ▇▁▁▁▁
A15 0 1 8501.85 191879.04 -2 0 0 0.0 14939000 ▇▁▁▁▁
A20 0 1 91805.46 808818.77 -2 0 0 3000.0 49072000 ▇▁▁▁▁
A40 0 1 41828.48 293969.27 -2 0 0 9000.0 19348000 ▇▁▁▁▁
B10 0 1 68834.01 917277.92 -2 0 0 0.0 59398000 ▇▁▁▁▁
B11 0 1 19931.51 259028.50 -2 0 0 0.0 28169000 ▇▁▁▁▁
B12 0 1 5454.66 56806.67 -2 0 0 0.0 3111000 ▇▁▁▁▁
B13 0 1 162690.86 2006666.21 -2 0 0 24000.0 213069000 ▇▁▁▁▁
C01 0 1 9904463.16 56726682.92 -2 481000 2407000 7717500.0 6016802000 ▇▁▁▁▁
C04 0 1 177204.10 1791292.67 -2 0 0 0.0 139157000 ▇▁▁▁▁
C05 0 1 918167.93 12489751.14 -2 0 0 331000.0 1445472000 ▇▁▁▁▁
C06 0 1 255034.54 3212965.61 -2 0 0 47000.0 262095000 ▇▁▁▁▁
C07 0 1 33335.24 848165.31 -2 0 0 0.0 52887000 ▇▁▁▁▁
C08 0 1 23623.02 667675.15 -2 0 0 0.0 68096000 ▇▁▁▁▁
C09 0 1 43071.09 351589.50 -2 0 0 0.0 16140000 ▇▁▁▁▁
C10 0 1 28732.95 385107.93 -2 0 2000 12000.0 42871000 ▇▁▁▁▁
C11 0 1 340673.71 3307919.59 -2 0 0 0.0 289060000 ▇▁▁▁▁
C12 0 1 219562.73 1235888.02 -2 0 0 68000.0 76286000 ▇▁▁▁▁
C13 0 1 1467624.72 15568842.66 -2 1000 76000 506000.0 1684314000 ▇▁▁▁▁
C14 0 1 928407.42 8605443.22 -2 23000 146000 508000.0 777993000 ▇▁▁▁▁
C15 0 1 783204.43 4229045.40 -2 0 69000 473000.0 257678000 ▇▁▁▁▁
C16 0 1 91089.01 662844.07 -2 0 0 43000.0 58822000 ▇▁▁▁▁
C17 0 1 13732.90 128064.08 -2 0 0 4500.0 10177000 ▇▁▁▁▁
C19 0 1 34663.02 223558.75 -2 0 0 2000.0 15750000 ▇▁▁▁▁
C20 0 1 1216737.06 6249021.27 -2 1000 178000 753000.0 394540000 ▇▁▁▁▁
C24 0 1 198884.54 5846320.83 -2 0 0 0.0 729737000 ▇▁▁▁▁
C25 0 1 658136.53 4121046.74 -2 11000 118000 399500.0 321606000 ▇▁▁▁▁
C35 0 1 111686.04 1947391.23 -2 0 0 0.0 208110000 ▇▁▁▁▁
C36 0 1 141945.32 4773984.42 -2 0 0 0.0 637078000 ▇▁▁▁▁
C38 0 1 631631.02 3267240.10 -2 0 0 161000.0 144110000 ▇▁▁▁▁
C39 0 1 31920.66 320989.51 -2 0 0 0.0 19306000 ▇▁▁▁▁
CCDNF 0 1 1.00 0.04 0 1 1 1.0 1 ▁▁▁▁▇
CENFILE 0 1 0.81 0.39 0 1 1 1.0 1 ▂▁▁▁▇
D11 0 1 603561.64 3194163.22 -2 0 0 208000.0 201357000 ▇▁▁▁▁
D23 0 1 442007.85 3606971.99 -2 0 0 46000.0 340982000 ▇▁▁▁▁
E07 0 1 1366785.35 6893020.79 -2 34000 227000 952000.0 479925000 ▇▁▁▁▁
E08 0 1 550224.54 1818703.71 -2 101000 283000 593000.0 144467000 ▇▁▁▁▁
E09 0 1 1559184.15 7539644.20 -2 102000 381000 1206000.0 528651000 ▇▁▁▁▁
E11 0 1 1071170.56 5492398.66 -2 50000 261000 830000.0 448671000 ▇▁▁▁▁
E13 0 1 17327081.34 127893156.30 -2 1171000 4198000 13455500.0 14936045000 ▇▁▁▁▁
E17 0 1 1578756.96 6042710.01 -2 46000 286000 1203000.0 333309000 ▇▁▁▁▁
F12 0 1 2583185.58 27822230.17 -2 0 37000 675000.0 3044559000 ▇▁▁▁▁
G15 0 1 181778.44 1733083.39 -2 0 0 0.0 106072000 ▇▁▁▁▁
HE1 0 1 1388857.91 10400685.61 -2 55000 289000 968000.0 1050214000 ▇▁▁▁▁
HE2 0 1 64137.90 490090.60 -2 0 0 24000.0 36903000 ▇▁▁▁▁
HR1 0 1 218646.58 2227187.59 -2 0 26000 115000.0 232282000 ▇▁▁▁▁
I86 0 1 971108.03 6437069.74 -2 0 64000 529000.0 439446000 ▇▁▁▁▁
K09 0 1 137397.72 665773.23 -2 0 18000 95000.0 48388000 ▇▁▁▁▁
K10 0 1 353665.65 1589523.52 -2 2000 65000 242000.0 61852000 ▇▁▁▁▁
K11 0 1 23581.48 246991.88 -2 0 0 0.0 12401000 ▇▁▁▁▁
L12 0 1 80185.74 1349349.83 -2 0 0 0.0 111938000 ▇▁▁▁▁
M12 0 1 11888.16 391698.39 -2 0 0 0.0 49058000 ▇▁▁▁▁
MEMBERSCH 0 1 2690.61 12815.67 -9 167 641 2083.5 1014020 ▇▁▁▁▁
Q11 0 1 702918.96 7421719.40 -2 0 33000 283000.0 634364000 ▇▁▁▁▁
T02 0 1 2639423.57 73594947.69 -2 -2 -2 -2.0 9073697000 ▇▁▁▁▁
T06 0 1 9271963.74 39717090.27 -2 -2 1187000 5982000.0 1818529000 ▇▁▁▁▁
T09 0 1 203461.80 3394275.08 -2 -2 0 0.0 161332000 ▇▁▁▁▁
T15 0 1 19918.86 255405.26 -2 -2 0 0.0 20748000 ▇▁▁▁▁
T40 0 1 99695.20 1287236.16 -2 -2 0 0.0 110682000 ▇▁▁▁▁
T99 0 1 74279.41 909702.46 -2 -2 0 0.0 58982000 ▇▁▁▁▁
TCAPOUT 0 1 3279609.48 29521711.94 -2 35000 285000 1333000.0 3151607000 ▇▁▁▁▁
TCURELSC 0 1 28436776.91 179053170.29 -2 2109000 7146000 22648500.0 19453219000 ▇▁▁▁▁
TCURINST 0 1 17327081.34 127893156.30 -2 1171000 4198000 13455500.0 14936045000 ▇▁▁▁▁
TCUROTH 0 1 1140166.50 5600778.27 -2 55500 283000 874000.0 448671000 ▇▁▁▁▁
TCURSSVC 0 1 9969528.76 48510265.33 -2 776000 2588000 8035000.0 4068503000 ▇▁▁▁▁
TFEDREV 0 1 4124828.42 25252763.77 -2 261000 899000 2680000.0 2047926000 ▇▁▁▁▁
TLOCREV 0 1 15170253.20 97409268.54 -2 499000 3031000 10780000.0 10600597000 ▇▁▁▁▁
TNONELSE 0 1 363810.50 3018302.79 -2 0 3000 109000.0 175673000 ▇▁▁▁▁
TOTALEXP 0 1 34140053.39 224183538.28 -2 2408000 8445000 26676000.0 24597709000 ▇▁▁▁▁
TOTALREV 0 1 33481814.83 199406458.00 -2 2434500 8493000 26767000.0 21023695000 ▇▁▁▁▁
TSTREV 0 1 14186732.90 85066587.68 -2 945000 3617000 11322500.0 8375172000 ▇▁▁▁▁
U11 0 1 20131.32 332556.14 -2 0 0 0.0 28720000 ▇▁▁▁▁
U22 0 1 108350.53 735676.31 -2 1000 12000 56000.0 73023000 ▇▁▁▁▁
U30 0 1 19000.78 222943.67 -2 0 0 0.0 19854000 ▇▁▁▁▁
U50 0 1 52941.97 586124.91 -2 0 0 13000.0 48929000 ▇▁▁▁▁
U97 0 1 619238.19 11118869.62 -2 6000 52000 244000.0 1421630000 ▇▁▁▁▁
V10 0 1 3998687.90 38204603.80 -2 186000 870000 3139000.0 4756409000 ▇▁▁▁▁
V11 0 1 1053954.52 4087112.12 -2 15000 176000 789500.0 243582000 ▇▁▁▁▁
V12 0 1 336373.22 1395789.52 -2 2000 51000 247000.0 83188000 ▇▁▁▁▁
V13 0 1 806004.12 4234643.26 -2 10000 118000 543000.0 321611000 ▇▁▁▁▁
V14 0 1 263026.59 1390817.64 -2 2000 37000 186000.0 108630000 ▇▁▁▁▁
V15 0 1 239975.18 825385.53 -2 34000 142000 269000.0 82474000 ▇▁▁▁▁
V16 0 1 92164.15 323598.91 -2 8000 42000 99000.0 25761000 ▇▁▁▁▁
V17 0 1 1107942.03 5242875.32 -2 63500 257000 835000.0 341063000 ▇▁▁▁▁
V18 0 1 370250.30 2069478.16 -2 15000 79000 289000.0 160978000 ▇▁▁▁▁
V21 0 1 978273.54 6346261.99 -2 31000 184000 681500.0 661475000 ▇▁▁▁▁
V22 0 1 388822.12 3028190.56 -2 8000 68000 276000.0 327418000 ▇▁▁▁▁
V23 0 1 416869.36 1827291.51 -2 0 46000 272500.0 68954000 ▇▁▁▁▁
V24 0 1 167106.71 771339.05 -2 0 12000 97000.0 34626000 ▇▁▁▁▁
V29 0 1 357825.95 2204270.87 -2 0 69000 259000.0 221534000 ▇▁▁▁▁
V30 0 1 131409.06 743125.50 -2 0 20000 89000.0 69482000 ▇▁▁▁▁
V32 0 1 4498.39 71129.59 -2 0 0 0.0 4338000 ▇▁▁▁▁
V33 0 1 2696.60 12832.19 -9 165 643 2092.0 1014020 ▇▁▁▁▁
V37 0 1 446569.69 2413022.59 -2 0 87000 315000.0 237834000 ▇▁▁▁▁
V38 0 1 187349.27 1238275.08 -2 0 29000 122000.0 117724000 ▇▁▁▁▁
V40 0 1 2712894.19 16354709.32 -2 187000 690000 2114500.0 1661029000 ▇▁▁▁▁
V45 0 1 1219745.94 8586494.05 -2 35000 269000 978500.0 1024981000 ▇▁▁▁▁
V60 0 1 62758.66 644426.67 -2 0 0 0.0 31334000 ▇▁▁▁▁
V65 0 1 6236.98 120315.98 -2 0 0 0.0 9815000 ▇▁▁▁▁
V70 0 1 201312.99 1906604.78 -2 0 0 43000.0 118338000 ▇▁▁▁▁
V75 0 1 110767.97 1647700.62 -2 0 0 0.0 148516000 ▇▁▁▁▁
V80 0 1 51729.23 615328.22 -2 0 0 0.0 40513000 ▇▁▁▁▁
V85 0 1 248.65 33609.35 -2 0 0 0.0 4540000 ▇▁▁▁▁
V90 0 1 981687.90 6017663.61 -2 43000 202000 676000.0 591836000 ▇▁▁▁▁
V91 0 1 180739.64 5702851.48 -2 0 0 0.0 738402000 ▇▁▁▁▁
V92 0 1 113014.75 3320447.49 -2 0 0 0.0 383908000 ▇▁▁▁▁
V93 0 1 132874.73 1178546.64 -2 0 8000 70000.0 115987000 ▇▁▁▁▁
W01 0 1 864911.54 8526711.15 -2 0 0 182000.0 701763000 ▇▁▁▁▁
W31 0 1 2592562.11 31381207.74 -2 0 0 132000.0 3650875000 ▇▁▁▁▁
W61 0 1 5791300.75 25187070.39 -2 79000 1284000 4379500.0 2178242000 ▇▁▁▁▁
WEIGHT 0 1 1.00 0.00 1 1 1 1.0 1 ▁▁▇▁▁
Z32 0 1 17275789.18 99544925.43 -2 1078000 4110000 13449000.0 10254583000 ▇▁▁▁▁
Z33 0 1 11689210.40 76409484.15 -2 719000 2755000 9035000.0 8613741000 ▇▁▁▁▁
Z34 0 1 6003717.44 46330908.53 -2 284000 1321000 4773000.0 5409968000 ▇▁▁▁▁
Z35 0 1 5979280.17 39123700.39 -2 0 497000 4110500.0 4011575000 ▇▁▁▁▁
Z36 0 1 1360212.70 14071446.23 -2 0 41000 776000.0 1713079000 ▇▁▁▁▁
Z37 0 1 234278.74 3988673.40 -2 0 0 89000.0 524807000 ▇▁▁▁▁
Z38 0 1 296060.84 1767268.94 -2 0 0 111000.0 78518000 ▇▁▁▁▁
_19H 0 1 21207890.62 143384108.88 -2 0 1330000 12291500.0 11615909000 ▇▁▁▁▁
_21F 0 1 2646400.13 39506333.52 -2 0 0 0.0 4342818000 ▇▁▁▁▁
_31F 0 1 1937930.41 16398282.10 -2 0 138000 925000.0 1210275000 ▇▁▁▁▁
_41F 0 1 21921404.30 163411365.20 -2 0 1347000 12785500.0 12642529000 ▇▁▁▁▁
_61V 0 1 502922.79 6044542.90 -2 0 0 0.0 625105000 ▇▁▁▁▁
_66V 0 1 437682.37 3431237.57 -2 0 0 0.0 230000000 ▇▁▁▁▁
_YEAR 0 1 10.00 0.00 10 10 10 10.0 10 ▁▁▇▁▁
YEAR 0 1 2010.00 0.00 2010 2010 2010 2010.0 2010 ▁▁▇▁▁

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
DL_INGESTION_DATETIME 0 1 2021-09-02 13:02:24 2021-09-02 13:02:24 2021-09-02 13:02:24 1
# Total spendings by state
total <-fiscal2010 %>% 
  group_by(STNAME) %>%
  summarise(TOTALEXP = sum(TOTALEXP))

total  %>%
  ggplot(aes(TOTALEXP, STNAME)) +
  geom_col()

#in descending order
total  %>%
  ggplot(aes(TOTALEXP, fct_reorder(STNAME, TOTALEXP))) +
  geom_col() 

#spending on instruction by state 
instruction <- fiscal2010 %>%
  group_by(STNAME) %>%
      summarise(E13 = sum(E13))

instruction  %>%
  ggplot(aes(E13, STNAME)) +
  geom_col()

instruction  %>%
  ggplot(aes(E13, fct_reorder(STNAME, E13))) +
  geom_col()

#New York spends more on instruction, while CA spends the most total

#spending on textbooks by state 
textbooks <- fiscal2010 %>%
  group_by(STNAME) %>%
      summarise(V93 = sum(V93))

textbooks  %>%
  ggplot(aes(V93, fct_reorder(STNAME, V93))) +
  geom_col()

#spending on Special Ed teachers
SpEd <- fiscal2010 %>%
  group_by(STNAME) %>%
      summarise(Z36 = sum(Z36))

SpEd  %>%
  ggplot(aes(Z36, fct_reorder(STNAME, Z36))) +
  geom_col()

#next step - figure out how to show those spendings per state as a proportion of the total spending if that state

Data Visualization 3

Amy

Research Question:

What is the relationship between the local revenue of local education agencies and students’ literacy outcomes on statewide assessments in 2010? Additional areas of exploration included (a) average total local revenue and local revenue property taxes by state, (b) the relationship between outcomes and total local revenue vs. local revenue from property taxes, (c) the relationship between both types of funding and outcomes by state, and (d) relationship between both types of revenue and outcomes of student subgroups (e.g., race/ethnicity, disability status, language proficiency, SES).

Datasets:

  • EDFacts_rla_achievement_lea_2010_2019
  • NCES_CCD_fiscal_district_2010

Variables:

EDFacts_rla_achievement_lea_2010_2019

  • YEAR
  • STNAM
  • FIPST
  • LEAID
  • ALL_RLA00PCTPROF = Percentage of all students who scored at or above their state’s proficiency level on Reading/Language Arts.
  • MAM_RLA03PCTPROF = Percentage of Native American students that scored at or above proficient
  • MAS_RLA00PCTPROF = Percentage of Asian/Pacific Islander students that scored at or above proficient
  • MBL_RLA00PCTPROF = Percentage of Black students that scored at or above proficient
  • MHI_RLA00PCTPROF = Percentage of Hispanic students that scored at or above proficient
  • MTR_RLA00PCTPROF = Percentage of students with Two or More Races that scored at or above proficient
  • MWH_RLA03PCTPROF = Percentage of White students that scored at or above proficient
  • CWD_RLA00PCTPROF = Percentage of children with disabilities that scored at or above proficient
  • ECD_RLA00PCTPROF = Percentage of economically disadvantaged students that scored at or above proficient
  • LEP_RLA00PCTPROF = Percentage of limited English proficient students that scored at or above proficient

NCES_CCD_fiscal_district_2010:

  • NAME
  • STABBR
  • CENSUSID
  • V33 = LEA fall membership (i.e., number of students)
  • TOTALREV = LEA total revenue
  • TFEDREV = LEA total federal revenue
  • TSTREV = LEA total state revenue
  • TLOCREV = LEA total local revenue
  • T06 = LEA local revenue from property taxes

Cleaning and Wrangling

# Selected columns of interest. Filtered so that rows with suppressed values (e.g., -9, -2) for key variables of interest weren't included. 
viz3_fiscal2010 <- fiscal2010 %>% 
  select(LEAID, NAME, STABBR, CENSUSID, V33,
         TOTALREV, TFEDREV, TSTREV, TLOCREV, T06) %>% 
  filter(V33 > 0, TLOCREV > 0, T06 > 0) %>%
  rename_with(tolower) %>% 
  rename(totalstu = v33, tlocrevtaxes = t06) %>% 
  mutate(locrev_stu = tlocrev / totalstu,
         locrevtaxes_stu = tlocrevtaxes / totalstu)
# Narrowed the RLA 2010 file down to variables of interest. Selected percent proficient variables across all grades (00) for race ethnicity, disability, English Language Learner status, and economically disadvantaged subgroups. Transformed variable names to be lowercase, transformed state names to be title case, and replaced the suppressed values (e.g., PS, n/a, etc.) with NA.
viz3_rlalea00 <- rlalea_10 %>% 
  select(YEAR, 
         STNAM, 
         FIPST,
         LEAID, 
         ALL_RLA00PCTPROF, 
         MAM_RLA00PCTPROF,
         MAS_RLA00PCTPROF,
         MBL_RLA00PCTPROF,
         MHI_RLA00PCTPROF,
         MTR_RLA00PCTPROF, 
         MWH_RLA00PCTPROF,
         CWD_RLA00PCTPROF,
         ECD_RLA00PCTPROF,
         LEP_RLA00PCTPROF) %>% 
  rename_with(tolower)  %>% 
  mutate(stnam = str_to_title(stnam), 
         (across(all_rla00pctprof:lep_rla00pctprof, 
                 ~replace(., . %in% c("PS", 
                                      "n/a",    
                                      "LT50",   
                                      "LE5",    
                                      "LE20",   
                                      "LE10",   
                                      "GE99",   
                                      "GE95",   
                                      "GE90",   
                                      "GE80",   
                                      "GE50"), NA))))

# Next step was cleaning the percentage columns to change percentage ranges to average percentages. I used the method Daniel used on the course data webpage and applied it to all subgroups.

# All = across all students
viz3_rlalea00 <- viz3_rlalea00 %>% 
  tidyr::separate(all_rla00pctprof, c("all_lower", "all_upper"), sep = "-") %>% 
  mutate(
    all_upper = ifelse(is.na(all_upper), all_lower, all_upper),
    all_lower = as.numeric(all_lower),
    all_upper = as.numeric(all_upper)
    ) %>% 
  rowwise() %>% 
  mutate(meanpctprof_all = mean(c(all_lower, all_upper))) %>% 
  ungroup()

# mam = American Indian/Alaska Native
viz3_rlalea00 <- viz3_rlalea00 %>% 
  tidyr::separate(mam_rla00pctprof, c("mam_lower", "mam_upper"), sep = "-") %>% 
  mutate(
    mam_upper = ifelse(is.na(mam_upper), mam_lower, mam_upper),
    mam_lower = as.numeric(mam_lower),
    mam_upper = as.numeric(mam_upper)
    ) %>% 
  rowwise() %>% 
  mutate(meanpctprof_mam = mean(c(mam_lower, mam_upper))) %>% 
  ungroup()

# mas = Asian/Pacific Islander
viz3_rlalea00 <- viz3_rlalea00 %>% 
  tidyr::separate(mas_rla00pctprof, c("mas_lower", "mas_upper"), sep = "-") %>% 
  mutate(
    mas_upper = ifelse(is.na(mas_upper), mas_lower, mas_upper),
    mas_lower = as.numeric(mas_lower),
    mas_upper = as.numeric(mas_upper)
    ) %>% 
  rowwise() %>% 
  mutate(meanpctprof_mas = mean(c(mas_lower, mas_upper))) %>% 
  ungroup()

# mbl = Black
viz3_rlalea00 <- viz3_rlalea00 %>% 
  tidyr::separate(mbl_rla00pctprof, c("mbl_lower", "mbl_upper"), sep = "-") %>% 
  mutate(
    mbl_upper = ifelse(is.na(mbl_upper), mbl_lower, mbl_upper),
    mbl_lower = as.numeric(mbl_lower),
    mbl_upper = as.numeric(mbl_upper)
    ) %>% 
  rowwise() %>% 
  mutate(meanpctprof_mbl = mean(c(mbl_lower, mbl_upper))) %>% 
  ungroup()

# mhi = Hispanic/Latino
viz3_rlalea00 <- viz3_rlalea00 %>% 
  tidyr::separate(mhi_rla00pctprof, c("mhi_lower", "mhi_upper"), sep = "-") %>% 
  mutate(
    mhi_upper = ifelse(is.na(mhi_upper), mhi_lower, mhi_upper),
    mhi_lower = as.numeric(mhi_lower),
    mhi_upper = as.numeric(mhi_upper)
    ) %>% 
  rowwise() %>% 
  mutate(meanpctprof_mhi = mean(c(mhi_lower, mhi_upper))) %>% 
  ungroup()

# mtr = Multiracial
viz3_rlalea00 <- viz3_rlalea00 %>% 
  tidyr::separate(mtr_rla00pctprof, c("mtr_lower", "mtr_upper"), sep = "-") %>% 
  mutate(
    mtr_upper = ifelse(is.na(mtr_upper), mtr_lower, mtr_upper),
    mtr_lower = as.numeric(mtr_lower),
    mtr_upper = as.numeric(mtr_upper)
    ) %>% 
  rowwise() %>% 
  mutate(meanpctprof_mtr = mean(c(mtr_lower, mtr_upper))) %>% 
  ungroup()

# mwh = White
viz3_rlalea00 <- viz3_rlalea00 %>% 
  tidyr::separate(mwh_rla00pctprof, c("mwh_lower", "mwh_upper"), sep = "-") %>% 
  mutate(
    mwh_upper = ifelse(is.na(mwh_upper), mwh_lower, mwh_upper),
    mwh_lower = as.numeric(mwh_lower),
    mwh_upper = as.numeric(mwh_upper)
    ) %>% 
  rowwise() %>% 
  mutate(meanpctprof_mwh = mean(c(mwh_lower, mwh_upper))) %>% 
  ungroup()

# cwd = children with disabilities
viz3_rlalea00 <- viz3_rlalea00 %>% 
  tidyr::separate(cwd_rla00pctprof, c("cwd_lower", "cwd_upper"), sep = "-") %>% 
  mutate(
    cwd_upper = ifelse(is.na(cwd_upper), cwd_lower, cwd_upper),
    cwd_lower = as.numeric(cwd_lower),
    cwd_upper = as.numeric(cwd_upper)
    ) %>% 
  rowwise() %>% 
  mutate(meanpctprof_cwd = mean(c(cwd_lower, cwd_upper))) %>% 
  ungroup()

# ecd = economically disadvantaged
viz3_rlalea00 <- viz3_rlalea00 %>% 
  tidyr::separate(ecd_rla00pctprof, c("ecd_lower", "ecd_upper"), sep = "-") %>% 
  mutate(
    ecd_upper = ifelse(is.na(ecd_upper), ecd_lower, ecd_upper),
    ecd_lower = as.numeric(ecd_lower),
    ecd_upper = as.numeric(ecd_upper)
    ) %>% 
  rowwise() %>% 
  mutate(meanpctprof_ecd = mean(c(ecd_lower, ecd_upper))) %>% 
  ungroup()

# lep = limited English proficiency (English Language Learner)
viz3_rlalea00 <- viz3_rlalea00 %>% 
  tidyr::separate(lep_rla00pctprof, c("lep_lower", "lep_upper"), sep = "-") %>% 
  mutate(
    lep_upper = ifelse(is.na(lep_upper), lep_lower, lep_upper),
    lep_lower = as.numeric(lep_lower),
    lep_upper = as.numeric(lep_upper)
    ) %>% 
  rowwise() %>% 
  mutate(meanpctprof_lep = mean(c(lep_lower, lep_upper))) %>% 
  ungroup()

# Get ride of the "_lower" and "_upper" percentage columns since they won't be needed
viz3_rlalea00 <- viz3_rlalea00 %>% 
  select(year, 
         stnam, 
         fipst,
         leaid,
         contains("meanpctprof"))
# Pivoted the dataset longer to have a column for subgroup and a column for mean percentage proficient. 

viz3_rlalea00_long <- viz3_rlalea00 %>%
  pivot_longer(
        cols = contains("meanpctprof"),
        names_to = "subgroup",
        values_to = "meanpctprof",
        names_prefix = "meanpctprof_") 
# Joined the long file with the cleaned/narrowed fiscal data file. Used an inner join because I'm only interested in LEAs that have both student proficiency and fiscal data. 
viz3_rla00long_fiscal_2010 <- inner_join(viz3_rlalea00_long, 
                                         viz3_fiscal2010, 
                                         by = "leaid")

Potential Visualizations

These are the data visualizations I am thinking I will choose from for our final product. I’m not going to include all of these. My primary next step is to narrow them down. Please note that they are still a bit rough and need refinement (some more than others). Refinement plans include… finalizing color, trying out annotations and/or highlighting, and exploring alternate options for faceting by state. If I include the plots with fitted lines, I plan to update the colors and replace the legend with annotations.

First off, bar graphs. The first two summarize LEA local revenue and LEA local revenue from property taxes averaged by state. The third graph summarizes the average percentage of students scoring at/above proficient by state.

# Bar graph: Average LEA total local revenue ($ per student) by state
viz3_rla00long_fiscal_2010 %>% 
  filter(subgroup == "all")  %>% 
  group_by(stnam) %>% 
  summarize(mean_locrev_stu = mean(locrev_stu)) %>% 
  ggplot(aes(x = mean_locrev_stu, y = fct_reorder(stnam, mean_locrev_stu))) +
  geom_col(color = "white", alpha = .6) +
  scale_x_continuous(expand = c(0, 0),
                     breaks = c(0, 2500, 5000, 7500, 10000, 12500),
                     labels = scales::dollar) +
  labs(title = "Average Local Revenue of LEAs",
       y = "State",
       x = "Dollar per student",
       caption = "Source: National Center for Education Statistics, 2010") +
  theme_minimal() +
  theme(plot.title.position = "plot",
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank())  

# Bar graph: Average LEA local revenue from property taxes ($ per student) by state
viz3_rla00long_fiscal_2010 %>% 
  filter(subgroup == "all")  %>% 
  group_by(stnam) %>% 
  summarize(mean_locrevtaxes_stu = mean(locrevtaxes_stu)) %>% 
  ggplot(aes(x = mean_locrevtaxes_stu, 
             y = fct_reorder(stnam, mean_locrevtaxes_stu))) +
  geom_col(color = "white", alpha = .6) +
  scale_x_continuous(expand = c(0, 0),
                    breaks = c(0, 2500, 5000, 7500, 10000),
                     labels = scales::dollar) +
  labs(title = "Average Local Revenue of LEAs from Property Taxes",
       y = "State",
       x = "Dollar per student",
       caption = "Source: National Center for Education Statistics, 2010") +
  theme_minimal() +
  theme(plot.title.position = "plot",
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank())

# Bar graph of average RLA proficiency for each state
viz3_rla00long_fiscal_2010 %>% 
  filter(subgroup == "all")  %>% 
  group_by(stnam) %>% 
  summarize(mean_pctprof = mean(meanpctprof, na.rm = T)) %>% 
  ggplot(aes(x = mean_pctprof, y = fct_reorder(stnam, mean_pctprof))) +
  geom_col(color = "white", alpha = .6) +
  scale_x_continuous(expand = c(0, 0),
                    breaks = c(0, 20, 40, 60, 80),
                    labels = c("0%", "20%", "40%", "60%", "80%")) +
  labs(title = "Average Proficiency in Reading/Language Arts",
       subtitle = "Students in Grades 3 through HS",
       y = "State",
       x = "Average Percentage",
       caption = "Source: National Center for Education Statistics, 2010") +
  theme_minimal() +
  theme(plot.title.position = "plot",
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank())  

Density ridges of local revenue from property taxes for LEAs by state. One thing I plan to do, if I include this plot, that I didn’t get to is sorting the states by mean revenue.

# Density ridges of local revenue from property taxes across LEAs by state
viz3_rla00long_fiscal_2010 %>% 
  filter(subgroup == "all")  %>% 
  ggplot(aes(x = locrevtaxes_stu, y = stnam)) +
    geom_density_ridges(fill = "cornflower blue", color = "white", alpha = .8) +
    theme_minimal() +
  labs(title = "Local Revenue from Property Taxes in LEAS of Each State",
       y = "State",
       x = "Dollar per student",
       caption = "Source: National Center for Education Statistics, 2010") +
  theme_minimal() +
  theme(plot.title.position = "plot") + 
  scale_x_continuous(expand = c(0, 0)) +
  coord_cartesian(xlim = c(0, 25000))

Scatterplots showing the relationship between types of revenue and average percentage of students scoring at/above proficient on statewide assessments of reading/language arts. Note that I used a log transformation of the x-axis to spread the points out. Without the transformation, the bulk of the points were clustered near the bottom of the range. I think that I could use highlighting and annotations for the lowest and highest points.

# Scatterplots: All students, LEA total local revenue ($ per stu) and LEA local revenue from property taxes by approx. pct proficient
viz3_rla00long_fiscal_2010 %>% 
  filter(subgroup == "all") %>% 
  ggplot(aes(x = locrev_stu, y = meanpctprof)) +
  geom_point(color = "gray30", fill = "gray30", alpha = .4) + 
  scale_x_log10(labels = scales::dollar) +
  labs(title = "Relationship between Local Revenue and RLA Proficiency",
       y = "Approximate Average Percent Proficient",
       x = "Dollar per Student",
       caption = "Source: National Center for Education Statistics, 2010") +
  theme_minimal() +
  theme(plot.title.position = "plot",
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank()) 

viz3_rla00long_fiscal_2010 %>% 
  filter(subgroup == "all") %>% 
  ggplot(aes(x = locrevtaxes_stu, y = meanpctprof)) +
  geom_point(color = "gray30", fill = "gray30", alpha = .4) + 
  scale_x_log10(labels = scales::dollar) +
  labs(title = "Local Revenue from Property Taxes and RLA Proficiency",
       y = "Approximate Average Percent Proficient",
       x = "Dollar per Student",
       caption = "Source: National Center for Education Statistics, 2010") +
  theme_minimal() +
  theme(plot.title.position = "plot",
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank())

Scatterplots faceted by state.

# Scatterplot: All students - relationship between local revenue ($ per student) and mean % proficient, faceted by state
viz3_rla00long_fiscal_2010 %>% 
  filter(subgroup == "all") %>% 
  ggplot(aes(x = locrev_stu, y = meanpctprof)) +
  facet_wrap(~stnam) +
  geom_point(color = "gray30", fill = "gray30", alpha = .4) + 
  scale_x_log10(labels = scales::dollar) +
  labs(title = "Total Local Revenue and RLA Proficiency",
       y = "Approximate Average Percent Proficient",
       x = "Dollar per Student",
       caption = "Source: National Center for Education Statistics, 2010") +
  theme_minimal() +
  theme(plot.title.position = "plot",
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank()) 

# Scatterplot: All students - relationship between local revenue from property taxes ($ per student) and mean % proficient, faceted by state
viz3_rla00long_fiscal_2010 %>% 
  filter(subgroup == "all") %>% 
  ggplot(aes(x = locrevtaxes_stu, y = meanpctprof)) +
  facet_wrap(~stnam) +
  geom_point(color = "gray30", fill = "gray30", alpha = .4) + 
  scale_x_log10(labels = scales::dollar) +
  labs(title = "Local Revenue from Property Taxes and RLA Proficiency",
       y = "Approximate Average Percent Proficient",
       x = "Dollar per Student",
       caption = "Source: National Center for Education Statistics, 2010") +
  theme_minimal() +
  theme(plot.title.position = "plot",
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank())

Scatterplots showing the relationship between revenue and outcomes, faceted by subgroup. These are still pretty rough (e.g., labels need work).

# Scatterplot: Relationship between local revenue ($ per student) and mean % proficient, faceted by subgroup
viz3_rla00long_fiscal_2010 %>% 
  ggplot(aes(x = locrev_stu, y = meanpctprof)) +
  facet_wrap(~subgroup) +
  geom_point(color = "gray30", fill = "gray30", alpha = .4) + 
  scale_x_log10(labels = scales::dollar) +
  labs(title = "Total Local Revenue and RLA Proficiency",
       y = "Approximate Average Percent Proficient",
       x = "Dollar per Student",
       caption = "Source: National Center for Education Statistics, 2010") +
  theme_minimal() +
  theme(plot.title.position = "plot",
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank()) 

# Scatterplot: Relationship between local revenue from property taxes ($ per student) and mean % proficient, faceted by subgroup
viz3_rla00long_fiscal_2010 %>% 
  ggplot(aes(x = locrevtaxes_stu, y = meanpctprof)) +
  facet_wrap(~subgroup) +
  geom_point(color = "gray30", fill = "gray30", alpha = .4) +
  scale_x_log10(labels = scales::dollar) +
  labs(title = "Local Revenue from Property Taxes and RLA Proficiency",
       y = "Approximate Average Percent Proficient",
       x = "Dollar per Student",
       caption = "Source: National Center for Education Statistics, 2010") +
  theme_minimal() +
  theme(plot.title.position = "plot",
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank())

Playing around with fitting lines. These are still rough. I need to work on the legend and want to try replacing it with annotations. Also could consider changing the color to highlight a specific group or pick a different color palette.

viz3_rla00long_fiscal_2010 %>% 
  filter(subgroup != "all") %>% 
  ggplot() +
  geom_smooth(method = lm, 
              se = F, 
              aes(x = locrevtaxes_stu, 
                  y = meanpctprof, 
                  color = subgroup)) +
  scale_x_log10(labels = scales::dollar) +
  labs(title = "Local Revenue from Property Taxes and RLA Proficiency",
       y = "Approximate Average Percent Proficient",
       x = "Dollar per Student",
       caption = "Source: National Center for Education Statistics, 2010") +
  theme_minimal() +
  theme(plot.title.position = "plot",
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank())

Fitted lines faceted by state (also very rough).

viz3_rla00long_fiscal_2010 %>% 
  filter(subgroup != "all") %>% 
  ggplot() +
  geom_smooth(method = lm, se = F, aes(x = locrevtaxes_stu, y = meanpctprof, color = subgroup)) +
  facet_wrap(~stnam) +
  scale_x_log10(labels = scales::dollar) +
  labs(title = "Local Revenue from Property Taxes and RLA Proficiency",
       y = "Approximate Average Percent Proficient",
       x = "Dollar per Student",
       caption = "Source: National Center for Education Statistics, 2010") +
  theme_minimal() +
  theme(plot.title.position = "plot",
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank())

Created maps displaying (a) average LEA local revenue in each state and (b) average LEA local revenue from property taxes in each state. I still need to fill in the missing states and want to try out different color palettes.

viz3_map_dfprep <- viz3_rla00long_fiscal_2010 %>% 
  filter(subgroup == "all") %>% 
  group_by(stnam) %>% 
  summarize(mean_locrev_stu = mean(locrev_stu),
            mean_locrevtaxes_stu = mean(locrevtaxes_stu))

viz3_us <- usa_sf() 

viz3_map_df <- left_join(viz3_map_dfprep, 
                      viz3_us, 
                      by = c("stnam" = "name"))

viz3_map_df %>% 
  ggplot(aes(geometry = geometry, fill = mean_locrev_stu)) +
  geom_sf(color = "white", size = 0) +
    scale_fill_viridis(option = "magma", 
                       name = "Dollar per student",
                                         breaks = c(0, 2500, 5000, 7500, 10000, 12500),
                                         labels = c("$0", 
                                                    "$2,500", 
                                                    "$5,000", 
                                                    "$7,500", 
                                                    "$10,000",
                                                    "$12,500")) +
  theme_void() +
  labs(title = "Average LEA Total Local Revenue",
       caption = "Source: National Center for Education Statistics, 2010") +
  theme(plot.title.position = "plot") 

viz3_map_df %>% 
  ggplot(aes(geometry = geometry, fill = mean_locrevtaxes_stu)) +
  geom_sf(color = "white", size = 0) +
    scale_fill_viridis(option = "magma", 
                       name = "Dollar per student",
                                         breaks = c(0, 2500, 5000, 7500, 10000),
                                         labels = c("$0", 
                                                    "$2,500", 
                                                    "$5,000", 
                                                    "$7,500", 
                                                    "$10,000")) +
  theme_void() +
  labs(title = "Average LEA Local Revenue from Property Taxes",
       caption = "Source: National Center for Education Statistics, 2010") +
  theme(plot.title.position = "plot") 

Other Data Visualizations

These are some of the preliminary visualizations I did that I don’t think I’m moving forward with. Because of this, refinement is minimal.

# Scatterplot: Total local LEA revenue from property taxes ($ per stu) x approx. pct proficient, color = subgroup
viz3_rla00long_fiscal_2010 %>% 
  filter(subgroup != "all") %>% 
  ggplot(aes(x = locrevtaxes_stu, y = meanpctprof, color = subgroup)) +
  geom_point(alpha = .4) + 
  scale_x_log10(labels = scales::dollar) +
  theme_minimal() +
  labs(title = "Revenue from property tax x meanpctprof, color by subgroup") +
  theme(plot.title.position = "plot",
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank())

Fitted lines for specific states:

# fitted lines in a few specific states
viz3_rla00long_fiscal_2010 %>% 
  filter(stnam == "Montana") %>% 
  ggplot() +
  geom_smooth(method = lm, 
              se = F, 
              aes(x = locrevtaxes_stu, 
                  y = meanpctprof, 
                  color = subgroup)) +
  scale_x_log10(labels = scales::dollar) +
  theme_minimal() +
  theme(plot.title.position = "plot",
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank())  +
  labs(title = "Montana")

viz3_rla00long_fiscal_2010 %>% 
  filter(stnam == "South Dakota") %>% 
  ggplot() +
  geom_smooth(method = lm, 
              se = F, 
              aes(x = locrevtaxes_stu, 
                  y = meanpctprof, 
                  color = subgroup)) +
  scale_x_log10(labels = scales::dollar) +
  theme_minimal() +
  theme(plot.title.position = "plot",
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank()) +
  labs(title = "South Dakota")

viz3_rla00long_fiscal_2010 %>% 
  filter(stnam == "New Jersey") %>% 
  ggplot() +
  geom_smooth(method = lm, 
              se = F, 
              aes(x = locrevtaxes_stu, 
                  y = meanpctprof, 
                  color = subgroup)) +
  scale_x_log10(labels = scales::dollar) +
  theme_minimal() +
  theme(plot.title.position = "plot",
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank()) +
  labs(title = "New Jersey")

# Fitting a line over data points for local revenue x meanpctprof by state
viz3_rla00long_fiscal_2010 %>% 
  filter(subgroup == "all") %>% 
  ggplot(aes(x = locrev_stu, y = meanpctprof)) +
  geom_smooth(method = "lm") +
  facet_wrap(~stnam) +
  geom_point(alpha = .1) + 
  scale_x_log10(labels = scales::dollar) 

# Scatterplot: Relationship between local revenue from property taxes ($ per student) and mean % proficient, faceted by state, color by subgroup 
viz3_rla00long_fiscal_2010 %>% 
  filter(subgroup != "all") %>% 
  ggplot(aes(x = locrev_stu, y = meanpctprof, color = subgroup)) +
  facet_wrap(~stnam) +
  geom_point(alpha = .4) + 
  scale_x_log10(labels = scales::dollar)

Scatterplots for each student subgroup between local revenue from property tax and % proficient, faceted by state. The subgroup is indicated in the title.

# Scatterplots: Relationship between local revenue from property taxes ($ per student) and mean % proficient, faceted by state for each subgroup 

viz3_rla00long_fiscal_2010 %>% 
  filter(subgroup == "mam") %>% 
  ggplot(aes(x = locrev_stu, y = meanpctprof)) +
  facet_wrap(~stnam) +
  geom_point(alpha = .4) + 
  scale_x_log10(labels = scales::dollar) +
  labs(title = "American Indian/Alaska Native")

viz3_rla00long_fiscal_2010 %>% 
  filter(subgroup == "mas") %>% 
  ggplot(aes(x = locrev_stu, y = meanpctprof)) +
  facet_wrap(~stnam) +
  geom_point(alpha = .4) + 
  scale_x_log10(labels = scales::dollar) +
  labs(title = "Asian/Pacific Islander")

viz3_rla00long_fiscal_2010 %>% 
  filter(subgroup == "mhi") %>% 
  ggplot(aes(x = locrev_stu, y = meanpctprof)) +
  facet_wrap(~stnam) +
  geom_point(alpha = .4) + 
  scale_x_log10(labels = scales::dollar) +
  labs(title = "Hispanic/Latino")

viz3_rla00long_fiscal_2010 %>% 
  filter(subgroup == "mbl") %>% 
  ggplot(aes(x = locrev_stu, y = meanpctprof)) +
  facet_wrap(~stnam) +
  geom_point(alpha = .4) + 
  scale_x_log10(labels = scales::dollar) +
  labs(title = "Black")

viz3_rla00long_fiscal_2010 %>% 
  filter(subgroup == "mwh") %>% 
  ggplot(aes(x = locrev_stu, y = meanpctprof)) +
  facet_wrap(~stnam) +
  geom_point(alpha = .4) + 
  scale_x_log10(labels = scales::dollar) +
  labs(title = "White")

viz3_rla00long_fiscal_2010 %>% 
  filter(subgroup == "mtr") %>% 
  ggplot(aes(x = locrev_stu, y = meanpctprof)) +
  facet_wrap(~stnam) +
  geom_point(alpha = .4) + 
  scale_x_log10(labels = scales::dollar) +
  labs(title = "Multiracial")

viz3_rla00long_fiscal_2010 %>% 
  filter(subgroup == "cwd") %>% 
  ggplot(aes(x = locrev_stu, y = meanpctprof)) +
  facet_wrap(~stnam) +
  geom_point(alpha = .4) + 
  scale_x_log10(labels = scales::dollar) +
  labs(title = "Students with Disabilities")

viz3_rla00long_fiscal_2010 %>% 
  filter(subgroup == "ecd") %>% 
  ggplot(aes(x = locrev_stu, y = meanpctprof)) +
  facet_wrap(~stnam) +
  geom_point(alpha = .4) + 
  scale_x_log10(labels = scales::dollar) +
  labs(title = "Economically Disadvantaged")

viz3_rla00long_fiscal_2010 %>% 
  filter(subgroup == "lep") %>% 
  ggplot(aes(x = locrev_stu, y = meanpctprof)) +
  facet_wrap(~stnam) +
  geom_point(alpha = .4) + 
  scale_x_log10(labels = scales::dollar) +
  labs(title = "Limited English Proficiency")